

*================================
*SETTINGS
*================================
set more off
set trace off
*set trace on 
*set tracedepth 2
set scheme s1mono
set linesize 255
set matsize 2000
cap log close _all


*================================
*I. LOAD RAWEST POSSIBLE DATA FROM OEMA
*================================

use "./raw/mepcom_army_1990_2014", clear

g completesort = -_n 

rename afqt_test_vrsn afqt_test_version
rename hist_afqt_test_vrsn_1 hist_afqt_test_version_1
rename hist_afqt_test_vrsn_2 hist_afqt_test_version_2
drop afqt_test_tmp

*================================
*II. PREP DATA IN ORDER TO GET IT TO ONE OBSERVATION PER PERSON
*================================

*Restrict to Apply Active Duty Army, Army Guard, or Army Reserves 
	*this drops observations who accessed army but did not apply 
keep if substr(apply_service,1,1)=="A" 
duplicates drop 

keep pid dob_s ///
apply* afqt* hist* access* ///
pcpc ve wkwk nomk mkmc mcao gsgs eive csei asas arar ///
 sex race ethnic educ_cert prsvc marst nrdep meps hor* statebrth ///
physical_dt_s drug_test_dt_s alcohol_test_dt_s completesort  min_access* max_af* last_af* high_af* erlst_*         

duplicates drop 
			
	*GENERATE FIRST AFQT DATE 
	gen firstafqtdt=afqt_dt_s
	replace firstafqtdt=hist_afqt_test_dt_1_s if hist_afqt_test_dt_1_s!=.
	replace firstafqtdt=hist_afqt_test_dt_2_s if hist_afqt_test_dt_2_s!=.
	*GENERATE FIRST AFQT SCORE 
	g firstafqt = afqt_pctl
	replace firstafqt = hist_afqt_pctl_1 if hist_afqt_pctl_1 != . 
	replace firstafqt = hist_afqt_pctl_2 if hist_afqt_pctl_2 != . 
	*GENERATE FIRST AFQT VERSION 
	g firstafqtver = afqt_test_version 
	replace firstafqtver = hist_afqt_test_version_1 if hist_afqt_test_version_1 != ""
	replace firstafqtver = hist_afqt_test_version_2 if hist_afqt_test_version_2 != ""
	
	*GENERATE SORTING VARIABLES
			
             g nmapplyarmy = -(apply_service=="AR")
             g nmdob = (dob_s == .)
			
			g nmname = 1
			
             g nmfirstafqtver = (firstafqtver == ""|firstafqtver == "ZZZ")                           
             g nmprsvc = (prsvc == "")                             
             g nmrace = (race== "")
             g nmeth = (ethn == "")                  
             g nmeduc = (educ_cert == "" | educ_cert == "99")                            
             g nmmarst = (marst == ""|marst == "Z" | marst == "Y")
             g nmnrdep = (nrdep == .|nrdep == 99)
             g nmsex = (sex== "")
             g nmzip = (hor_zip == "")                             
             g nmcounty = (hor_county == "")                             
             g nmmeps = (meps == "" | meps == "ZZZ")
             g nmstate = (hor_state == "" | hor_state == "ZZ")                          
             g nmcity = (hor_city == "")                        
             g nmstatebrth = (statebrth == "" | statebrth == "ZZ")
             g nmcntry = (hor_country == "" | hor_country == "ZZ")               
             g nmcurrentafqt = (afqt_dt_s==.)
             g nmpcpc = (pcpc==.)
             g nmve  = (ve==.)
             g nmwkwk = (wkwk==.)
             g nmnomk = (nomk==.)
             g nmmkmc = (mkmc==.)
             g nmmcao = (mcao==.)
             g nmgsgs = (gsgs==.)
             g nmeive = (eive==.)
             g nmcsei = (csei==.)
             g nmasas = (asas==.)
             g nmarar = (arar==.)
			 

		
			local sortorder firstafqtdt apply_dt_s physical_dt_s drug_test_dt_s alcohol_test_dt_s nmapplyarmy nmdob nmfirstafqtver nmprsvc  nmname  nmrace nmeth nmeduc nmmarst nmnrdep ///
			nmsex nmzip nmcounty nmmeps nmstate nmcity nmstatebrth nmcntry nmcurrentafqt nmpcpc nmve nmwkwk nmnomk nmmkmc nmmcao ///
			nmgsgs nmeive nmcsei nmasas nmarar completesort
		
		
	
*============================*
*============================*
*III. DROP DUPLICATES: 
	*keep obs with earliest afqt score + earliest application date
	*break the many ties with non-missing covariates
	*there will still be duplicates, hence the completesort
*============================*
*============================*	
		
	gsort pid `sortorder'
	by pid (`sortorder'): g keep = _n == 1
	keep if keep == 1 
	drop keep 
	isid pid
	
	drop nmapplyarmy nmfirstafqtver nmprsvc  nmdob nmrace nmeth nmeduc nmmarst nmnrdep ///
			nmsex nmzip nmcounty nmmeps nmstate nmcity nmstatebrth nmcntry nmcurrentafqt nmpcpc nmve nmwkwk nmnomk nmmkmc nmmcao ///
			nmgsgs nmeive nmcsei nmasas nmarar completesort
		
	drop physical_dt_s drug_test_dt_s alcohol_test_dt_s
 	

*============================*
*============================*
*IV. APPLY SAMPLE RESTRICTIONS
*============================*
*============================*	
	
	*................................................*
	*1. RESTRICT TO ACTIVE DUTY ARMY APPLICANTS
	*................................................*
	keep if apply_service == "AR"  

	
	*................................................*
	*2a. DROP IF PRIOR SERVICE
	*................................................*
	drop if prsvc == "Y" 
	*................................................*
	*2b. Also drop if accession date prior to AFQT date
	*................................................*	
	drop if min_access_dt < firstafqtdt - 31 

	
	*................................................*
	*3. drop observations with invalid first afqt test scores
	*................................................*	
	drop if firstafqt == 0 | firstafqt == .  

	*additional quality checks: historical dates need to be after current date		(very few in each)	
	drop if hist_afqt_test_dt_1_s > afqt_dt_s   & hist_afqt_test_dt_1_s != . 
	drop if hist_afqt_test_dt_2_s > afqt_dt_s   & hist_afqt_test_dt_2_s != .  
	drop if hist_afqt_test_dt_2_s > hist_afqt_test_dt_1_s   & hist_afqt_test_dt_1_s != .  & hist_afqt_test_dt_2_s != . 
	count if apply_dt < firstafqtdt 
	
	
	*................................................*
	*4. drop ineligible ages based on age at first AFQT 
	*................................................*	
	g age_days = (firstafqtdt-dob_s)/365.25 
	drop if age_days < 17 
	drop if age_days >= 35 & age_days < .   
	drop if age_days == .  

	
	*................................................*
	*5. keep FY 1990 - FY 2014
	*................................................*	
		drop if firstafqtdt <= d(30sep1989) 
		drop if firstafqtdt >= d(01oct2014) 

	*................................................*
	*6. Drop if you took a student version of the AFQT 
	*................................................*	
	gen testver=afqt_test_version
	replace testver=hist_afqt_test_version_1 if hist_afqt_test_version_1!=""
	replace testver=hist_afqt_test_version_2 if hist_afqt_test_version_2!=""

	gen probstudent=0
	replace probstudent=1 if firstafqtver=="18F"|firstafqtver=="18G"|firstafqtver=="18H"|firstafqtver=="18S"
	replace probstudent=1 if firstafqtver=="19F"|firstafqtver=="19G"|firstafqtver=="19R"
	replace probstudent=1 if firstafqtver=="23F"|firstafqtver=="23G"
	replace probstudent=1 if firstafqtver=="24F"|firstafqtver=="24G"|firstafqtver=="24V"|firstafqtver=="24W"
	replace probstudent=1 if (firstafqtver=="23A"|firstafqtver=="23B"|firstafqtver=="24A"|firstafqtver=="24B") & firstafqtdt>=d(1jul2002) & firstafqtdt<=d(30apr2004) 

	drop if probstudent==1   
	drop probstudent
	count 

	
*============================* 
*============================*
*============================*
*V. GENERATE OUTCOMES AND COVARIATES
*============================*
*============================*
*============================*

*accession into any service (ever accessed)
gen access=(min_access_dt!=.)

*more specific accession info (accession into subsets)
gen accessarmy = min_access_serv == "AR" | min_access_serv == "AG" | min_access_serv == "AV" | min_access_serv == "AZ"  
gen accessarmy_guard = (min_access_serv == "AG")
gen accessarmy_active =(min_access_serv=="AR")
gen accessarmy_reserves =(min_access_serv=="AV")
gen active=(min_access_serv=="AR"|min_access_serv=="CR"|min_access_serv=="FR"|min_access_serv=="MR"|min_access_serv=="NR")
gen reserve=(min_access_serv=="AG"|min_access_serv=="AV"|min_access_serv=="CV"|min_access_serv=="FG"|min_access_serv=="FV"|min_access_serv=="MV"|min_access_serv=="NV")
gen otherserv=(min_access_serv=="CR"|min_access_serv=="CV"|min_access_serv=="FG"|min_access_serv=="FR"|min_access_serv=="FV"|min_access_serv=="MR"|min_access_serv=="MV"|min_access_serv=="NR"|min_access_serv=="NV")

*Code sex 
gen male=(sex=="M")

		gen likely_new_coding =(ethnic=="A"|ethnic=="B"|ethnic=="D")
		count if ethnic == "D" & apply_dt_s<d(1sep2002)
		replace likely_new_coding=0 if apply_dt_s<d(1sep2002)

		gen white=0
		replace white=1 if race=="C" & likely_new_coding==0
		replace white=1 if race=="E" & likely_new_coding==1

		gen black=0
		replace black=1 if race=="N" & likely_new_coding==0
		replace black=1 if (race=="C") & likely_new_coding==1

		gen hisp=0
		replace hisp=1 if (ethnic=="S"|ethnic=="1"|ethnic=="4"|ethnic=="6"|ethnic=="9") & likely_new_coding==0
		replace hisp=1 if ethnic=="A" & likely_new_coding==1
		replace white=0 if hisp==1
		replace hisp=0 if black==1
		drop likely_new_coding
		
*code up marital status
gen single=(nrdep==0)
gen married=(marst=="M")

*indicators for you took the test more than once
gen retest=(hist_afqt_test_dt_1_s != .)
gen retest_2=(hist_afqt_test_dt_1_s!= . & hist_afqt_test_dt_2_s != .)

*g fiscal year at time of first afqt.  
		g firstafqt_fy = .
		forvalues yr = 1988(1)2016 {
		local yrp1 = `yr' + 1
		replace firstafqt_fy = `yrp1' if firstafqtdt >= d(01oct`yr') & firstafqtdt < d(01oct`yrp1')
		}
	*Designate anyone who took their AFQT in the last quarter of FY2004 as FY 2004.75 
	*since this quarter is when the Army adopted the new ASVAB test version (but kept the same cut-offs)
	replace firstafqt_fy=2004.75 if firstafqtdt>=d(1jul2004) & firstafqtdt<=d(30sep2004)	
	*Generate variable indicating application after the AFQT re-norm
	gen postrenorm=(firstafqt_fy>2004)

*GEN AGGREGATED EDUCATION VARIABLES
g educ_missing = (educ_cert == "99" | educ_cert == "")  //  unknown + missing
gen educ_lessthanhs = educ_cert == "11" | educ_cert == "14" | educ_cert == "48" // "less than hs diploma" + "credential near completion" + completed HS but did not pass the HS exit exam
gen educ_inhs=(educ_cert=="12"|educ_cert=="13") // currently in hs + high school senior
g educ_ged = (educ_cert=="21"|educ_cert=="22"|educ_cert=="23"|educ_cert=="24"|educ_cert=="25"|educ_cert=="26"|educ_cert=="27"|educ_cert=="28") 
g educ_hsdip =  educ_cert == "31"
g educ_some_coll = (educ_cert=="41"|educ_cert=="42"|educ_cert=="44"|educ_cert=="45") // includes associates degrees
g educ_coll_grad = (educ_cert=="51"|educ_cert=="61"|educ_cert=="62"|educ_cert=="63"|educ_cert=="64"|educ_cert=="65") 


*Identify individuals who applied at a MEPS under USMEPSCOM Eastern Command (for balance checks)
**SOURCE: USMEPCOM Website:  http://www.mepcom.army.mil/battalions/index.html
gen mepsub=substr(meps,1,1)
gen eastmeps=0
replace eastmeps=1 if mepsub=="A"
replace eastmeps=1 if meps=="B54"|meps=="B56"|meps=="B57"|meps=="B59"|meps=="B61"|meps=="B42"|meps=="B26"|meps=="B50"|meps=="B27"|meps=="B45"|meps=="B62"|meps=="B28"|meps=="B29"
gen nomeps=(mepsub=="Z")
drop mepsub


*============================*
*============================*
*VI. LABEL, ORDER, CLEAN  VARIABLES 
*============================*
*============================*

*RAW DATA 
label var access_dt_s_1 "Accession date" 
label var access_service_1 "Accession service" 
label var apply_dt_s "Date of Application"
label var apply_service "Service Applied to"
label var apply_fy "Fiscal Year of Application"

label var pid "anonymized person id"
label var afqt_pctl "recent afqt score"
label var afqt_dt_s "recent afqt date"
label var afqt_test_version "recent afqt test version"
label var hist_afqt_pctl_1 "2nd most recent afqt score"
label var hist_afqt_pctl_2 "3rd most recent afqt score"
label var hist_afqt_test_dt_1 "2nd most recent afqt date"
label var hist_afqt_test_dt_2 "3rd most recent afqt date"
label var hist_afqt_test_version_1 "2nd most recent afqt test ver"
label var hist_afqt_test_version_2 "3rd most recent afqt test ver"

label var educ_cert "Educ. Status"
label var sex "Sex"
label var race "Race"
label var ethnic "Ethnicity"
label var prsvc "Prior Service"
label var marst "Marital Status"
label var nrdep  "N. of Dependents"
label var meps "MEPS Station ID"
label var hor_city "Home of Record City"
label var hor_country "Home of Record Country"
label var hor_state "Home of Record State"
label var hor_county "Home of Record County"
label var hor_zip  "Home of Record ZIP"
label var statebrth "State of Birth"


label var min_access_dt_1 "Earliest accession date on any record" 
label var min_access_service "Service Accessed Into" 

				label var max_afqt_dt_s "Most Recent AFQT date on any record"
				label var last_afqt_pctl "Most Recent AFQT score on any record"
				label var high_afqt_pctl "Highest AFQT score on any record"

				label var erlst_frst_afqt_dt_s "Earliest AFQT date on any record"
				label var erlst_frst_afqt_pctl "Earliest AFQT score on any record"
				label var erlst_frst_afqt_test_vrsn "Earliest AFQT version on any record"
				label var erlst_afqt_pctl "(recent) AFQT score on the earliest record"
				label var erlst_apply_dt_s "Application date on earliest record"
				label var erlst_apply_service "Application service on earliest record"


				
label var access "Accessed (Any Service)" 
label var active "Accessed Active Duty (All Services)" 
label var accessarmy "Accessed Army" 
label var accessarmy_active "Accessed Active Duty Army" 
label var accessarmy_reserves "Accessed Army Reserves" 
label var accessarmy_guard "Accessed Army Guard" 
label var otherserv "Accessed Non-Army Service" 
label var reserve "Accessed Reserves (All Services)" 

label var firstafqt "Earliest AFQT score on Army file"
label var firstafqtdt "Date of Earliest AFQT score on Army file"
label var firstafqt_fy "FY of earliest AFQT score (2004.75 is last quarter of 2004 when ASVAB was renormed) on Army file"
label var firstafqtver "Test Version of Earliest AFQT score on Army file"

label var age_days "Age at First AFQT" 
label var male "Male" 
label var white "White" 
label var black "Black" 
label var hisp "Hispanic" 

label var single "No Dependents" 
label var married "Married" 
label var retest "Took 2 AFQTs" 
label var retest_2 "Took at least 3 AFQTs" 

label var educ_missing "Missing Education" 
label var educ_lessthanhs "Max Ed: less than HS"  
label var educ_inhs "Still in HS" 
label var educ_ged "Max Ed: GED or equivalent" 
label var educ_hsdip "Max Ed: HS Diploma" 
label var educ_some_coll "Max Ed: Some College" 
label var educ_coll_grad "Max Ed: Bachelors or higher" 

label var postrenorm "Indicator for post 2004 ASVAB renorm"
label var eastmeps "Eastern Command MEPS"
label var nomeps "No MEPS"

drop access_dt_s_2-access_dt_s_9 
drop access_service_2-access_service_9
drop testver 

format firstafqtdt %d
order pid  firstafqtdt firstafqt_fy firstafqt max_afqt_dt_s last_afqt_pctl high_afqt_pctl firstafqtver  apply_dt_s  access  min_access_dt min_access_serv  accessarmy* active reserve otherserv  age_days male white black hisp single married retest retest_2 educ_* educ_cert access_dt_s_1 access_service_1 access*
des *
sort firstafqtdt  
compress

count 
save "./processed/cleaned_mepcom.dta", replace

